Bruvo.distance <- function(genotype1, genotype2, maxl=9, usatnt=2, missing=-9) {

require(combinat) # for permn

if(identical(genotype1, genotype2)) {
    dist <- 0
} else {
    if((length(genotype1)>maxl & length(genotype2)>maxl)|
       genotype1[1]==missing|genotype2[1]==missing){
        dist <- NA
    } else {
        if(length(genotype1) >= length(genotype2)) {
            genotypeL <- genotype1/usatnt; genotypeS <- genotype2/usatnt
        } else {
                genotypeL <- genotype2/usatnt; genotypeS <- genotype1/usatnt
            }

# if genotypes are identical, just return a distance of zero without doing the rest
        # of the calculation
# if genotypes are both longer than 9, skip this calculation because it will take hours
# whichever genotype has more alleles, make this genotypeL (long) and the other
        # genotypeS (short)
# convert alleles into repeat counts by dividing by usatnt

kl <- length(genotypeL) # sets the ploidy level for this genotype comparison
ks <- length(genotypeS) # number of alleles in the shorter genotype

allele.distances <- array(0 , c(kl,ks))
# Create an empty matrix to contain the raw distances between alleles

for(n in 1:kl) { for(m in 1:ks) {allele.distances[n,m] <- genotypeL[n] - genotypeS[m]}}
# fills the array with the differences in allele repeat count

geometric.distances <- array(1 - 2^-abs(allele.distances) , c(kl,ks))
# geometric transformation based on mutation probabilities

#Next, find the minimum distance sum among all permutations

column <- 1:ks # an index of all columns (genotypeS alleles)
row <- 1:kl # an index of all rows (genotypeL alleles)

combinations <- combn(row, ks, FUN = NULL, simplify=FALSE)
# all combinations of alleles in genotypeL that can be matched to non-virtual
        # alleles in genotypeS

permutations <- permn(ks)
# all possible orders that alleles within these combinations can go in

mindist <- Inf # this variable will store the minimum sum encountered so far.

for(i in 1:length(combinations)) {
    # the loop to go through every possible sum of compatible allele comparisons
    rowcomb <- combinations[[i]] # choose one combination of rows for this round

    for(l in 1:length(permutations)){ # go through all orders of this combinations of rows
        sum <- 0 # this is si, the sum of allele comparisons

        # the loop to calculate the sum for this permutation
        for(j in 1:ks){
            sum <- sum + geometric.distances[rowcomb[permutations[[l]][j]],column[j]]
        }
        if(sum < mindist) {mindist <- sum} # is this the minimum sum found so far?
    }
}

dist <- (mindist+kl-ks)/kl
# add 1 for each infinite virtual allele, then divide by the ploidy

}}
return(dist)
}

read.GeneMapper<-function(infiles, missing=-9){
    samples<-c()
    loci<-c()
    locusdata<-list()
    length(locusdata)<-length(infiles)
    for(i in 1:length(infiles)){
        locusdata[[i]]<-read.table(infiles[i],sep="\t",header=TRUE,
                                   as.is=c("Sample.Name","Marker"))
        samples<-c(samples,locusdata[[i]][["Sample.Name"]])
        loci<-c(loci,locusdata[[i]][["Marker"]])
    }
    samples<-unique(samples)
    loci<-unique(loci)
    genotypedata<-array(list(missing),c(length(samples), length(loci)),
                            list(samples,loci))
    for(m in 1:length(locusdata)){
        alleleindex<-grep("Allele",names(locusdata[[m]]),value=FALSE)
        for(j in 1:length(locusdata[[m]][["Sample.Name"]])){
                untrimmedgenotype<-locusdata[[m]][j,alleleindex]
                #Gives an error if there are loci or samples not in the arguments
                genotypedata[[locusdata[[m]][["Sample.Name"]][j],
                              locusdata[[m]][["Marker"]][j]]]<-
                    untrimmedgenotype[!is.na(untrimmedgenotype)]
            }
        }
        return(genotypedata)
}

distance.matrix.1locus<-function(gendata, distmetric=Bruvo.distance,
                                 progress=TRUE, ...){
    count<-length(gendata) # how many genotypes there are
    distances<-matrix(nrow=count, ncol=count,
                      dimnames=list(names(gendata),names(gendata)))
    for(m in 1:count){for(n in m:count){
        thisdistance<-distmetric(gendata[[m]],gendata[[n]], ...)
        distances[m,n]<-thisdistance
        distances[n,m]<-thisdistance
        if(progress){print(c(names(gendata)[[m]],names(gendata)[[n]]))}
    }}
    return(distances)
}

meandistance.matrix <- function(gendata, samples=dimnames(gendata)[[1]],
                                 loci=dimnames(gendata)[[2]], all.distances=FALSE,
                                 usatnts=NULL, ...){

# create an array containing all distances by locus and sample
    loci.matrices<-array(dim=c(length(loci),length(samples),length(samples)),
                         dimnames=list(loci,samples,samples))
    for(i in loci){
       if(is.null(usatnts)){
           loci.matrices[i,,]<-distance.matrix.1locus(gendata[samples,i],...)
       } else {
           loci.matrices[i,,]<-distance.matrix.1locus(gendata[samples,i],
                                                      usatnt=usatnts[i],...)
       }
    }

    # calculate the mean matrix across all loci
    mean.matrix<-matrix(nrow=length(samples),ncol=length(samples),
                        dimnames=list(samples,samples))
    for(j in samples){
        for(k in samples){
            mean.matrix[j,k]<-mean(loci.matrices[,j,k][!is.na(loci.matrices[,j,k])])
        }
    }

    # return either the mean matrix and possibly the array as well
    if(all.distances){
        return(list(DistByLoc=loci.matrices, MeanMatrix=mean.matrix))
    } else {
        return(mean.matrix)
    }
}

write.Structure <- function(gendata, ploidy, file="",
                            samples=dimnames(gendata)[[1]], loci=dimnames(gendata)[[2]],
                            indploidies=rep(ploidy,times=length(samples)),
                            extracols=NULL, missing=-9){
    if(is.null(names(indploidies))) {names(indploidies)<-samples}
    structdata <- data.frame(rowlabel=c("missing",rep(samples, each=ploidy)))
    thiscol = 1 # which column we are working with

    # Fill in extra columns.  Ignored if extracols=NULL.
    for(excol in dimnames(extracols)[[2]]){
        thiscol <- thiscol+1
        structdata[[thiscol]]<-c(0,rep(extracols[samples, excol], each=ploidy))
        names(structdata)[thiscol]<-excol
    }

    # Fill in the genotypes
    for(locus in loci){
        thiscol<-thiscol+1
        alleles<-missing #set up the vector of alleles
        for(s in samples){
            #If missing data must be inserted to reflect a lower ploidy level:
            if(indploidies[s] < ploidy){
                if(length(gendata[[s,locus]]) == indploidies[s]){
                    #fully heterozygous genotype
                    thesealleles <- c(gendata[[s,locus]],
                                    rep(missing, times=ploidy-indploidies[s]))
              } else {
                  if(length(gendata[[s,locus]]) < indploidies[s]){
                      #duplicate the first allele to get to the right ploidy
                      thesealleles <- c(gendata[[s,locus]],
                                        rep(gendata[[s,locus]][1],
                                            times=indploidies[s]-
                                            length(gendata[[s,locus]])),
                                        rep(missing, times=ploidy-indploidies[s]))
                  } else {
                      #randomly choose alleles to use if there are too many
                      thesealleles <- c(sample(gendata
                                       [[s,locus]],indploidies[s],replace=FALSE),
                                        rep(missing, times=ploidy-indploidies[s]))
                  }
              }
            #If the individual has equal or greater ploidy to that
                #being used in the file:
            } else {
                if(length(gendata[[s,locus]]) == ploidy){
                    #genotype fills available spaces
                    thesealleles <- gendata[[s,locus]]
                } else {
                    if(length(gendata[[s,locus]]) < ploidy){
                        #duplicate the first allele to get to the right ploidy
                        thesealleles<-c(gendata[[s,locus]],
                                        rep(gendata[[s,locus]][1],
                                            times=ploidy-length(gendata[[s,locus]])))
                    } else {
                        #randomly choose alleles to use if there are too many
                        thesealleles<-sample(gendata[[s,locus]],ploidy,replace=FALSE)
                    }
                }
            }
            alleles<-c(alleles,thesealleles)
        }
        structdata[[thiscol]]<-alleles
        names(structdata)[thiscol]<-locus
    }
   write.table(structdata, file=file, sep="\t", row.names=FALSE,
               quote=FALSE, col.names=TRUE)
}

estimate.ploidy <- function(gendata, samples=dimnames(gendata)[[1]],
                            loci=dimnames(gendata)[[2]]){
    # set up array to contain the maximum and average number of alleles
    ploidyinfo <- array(dim=c(length(samples),2),
                        dimnames=list(samples, c("max.alleles","mean.alleles")))

    # fill the array
    for(s in samples){
        numalleles<-mapply(length,gendata[s,loci])
        ploidyinfo[s,"max.alleles"] <- max(numalleles)
        ploidyinfo[s,"mean.alleles"] <- mean(numalleles)
    }

    return(ploidyinfo)
}

read.Tetrasat <- function(infile,missing=-9){
    #read the file into a character vector, containing all the lines
    rawdata<-readLines(infile)
    #find which lines delimit populations
    popindex<-grep("pop",rawdata,ignore.case=TRUE,value=FALSE)
    #get a character vector of loci, whether they were stored in one or
    #several lines
    loci<-rawdata[2:(popindex[1]-1)]
    if(length(loci) == 1){loci<-strsplit(loci,",")[[1]]}
    #find which lines contain genotype data
    samindex<-c(popindex[1]:length(rawdata))
    samindex<-samindex[!samindex %in% popindex]
    #make a two-dimensional list to hold the genotypes
    gendata<-array(list(missing),dim=c(length(samindex),length(loci)),
                   dimnames=list(c(1:length(samindex)),loci))
    #make a vector containing population data
    popdata<-c()
    length(popdata)<-length(samindex)

    #Extract the data out of the lines
    for(i in 1:length(samindex)){
        #Find the largest value in popindex that is smaller than samindex[i]
        #Use the position of that value in popindex for the pop id
        popdata[i]<-match(max(popindex[popindex < samindex[i]]),popindex)
        #extract sample names and genotypes
        samname<-gsub(" ","",substring(rawdata[samindex[i]],1,20),fixed=TRUE)
        names(popdata)[i]<-samname
        dimnames(gendata)[[1]][i]<-samname
        thesegenotypes<-substring(rawdata[samindex[i]],
                                  seq(length=length(loci),from=21,by=9),
                                  seq(length=length(loci),from=28,by=9))
        for(j in 1:length(loci)){
            thesealleles<-gsub(" ","",thesegenotypes[j],fixed=TRUE)
            if(nchar(thesealleles) != 0){
              thesealleles<-substring(thesealleles,
                                      seq(length=nchar(thesealleles)/2,from=1,by=2),
                                      seq(length=nchar(thesealleles)/2,from=2,by=2))
              thesealleles<-unique(as.integer(thesealleles))
            } else {thesealleles<-missing}
            gendata[[i,j]]<-thesealleles
        }
    }
    #return genotype and pop data
    return(list(PopData=popdata,Genotypes=gendata))
}

read.ATetra<-function(infile){
    #read the file into a list of vectors, one for each line
    rawdata<-strsplit(readLines(infile),",")
    #make an index of which lines contain locus and sample info
    #also make an index of all loci
    locindex<-c()
    samindex<-c()
    for(i in 1:length(rawdata)){
        if(rawdata[[i]][1] == "LOC"){
            locindex<-c(locindex,i)
            names(locindex)[length(locindex)]<-rawdata[[i]][3]
        }
        if(rawdata[[i]][1] == "IND"){
            samindex<-c(samindex,i)
        }
    }
    #make a vector of all loci
    loci<-names(locindex)
    #make a vector of all samples, and a vector of which samples go in which pops
    samples<-c()
    popdata<-c()
    samindex1loc<-samindex[samindex < locindex[2]]
    for(j in 1:length(samindex1loc)){
        samples[j]<-rawdata[[samindex1loc[j]]][5]
        popdata[j]<-as.integer(rawdata[[samindex1loc[j]]][3])
    }
    names(popdata)<-samples
    #set up the array to contain genotypes
    gendata<-array(list(missing),dim=c(length(samples),length(loci)),
                   dimnames=list(samples,loci))
    #fill the array of genotypes
    for(m in samindex){
        thesealleles<-rawdata[[m]][6:9]
        thesealleles<-as.integer(thesealleles[thesealleles !=""])
        thesealleles<-thesealleles[!is.na(thesealleles)]
        gendata[[as.integer(rawdata[[m]][4]),as.integer(rawdata[[m]][2])]]<-thesealleles
    }
    #return population data and genotypes
    return(list(PopData=popdata,Genotypes=gendata))
}

dominant.to.codominant<-function(domdata,colinfo=NULL,samples=dimnames(domdata)[[1]],
                                 missing=-9,allelepresent=1,split="."){
    #get locus and allele information
    if(is.null(colinfo)){
        rawcol<-strsplit(dimnames(domdata)[[2]],split,fixed=TRUE)
        loc<-c()
        alleles<-c()
        for(i in 1:length(rawcol)){
            loc[i]<-rawcol[[i]][1]
            alleles[i]<-as.integer(rawcol[[i]][2])
        }
        colinfo<-data.frame(Loci=loc, Alleles=alleles)
    }

    #extract a list of loci
    loci<-unique(colinfo[[1]])
    #set up the genotype list
    gendata<-array(list(missing),dim=c(length(samples),length(loci)),
                   dimnames=list(samples,loci))
    #fill the genotype vectors
    for(n in 1:dim(domdata)[2]){
        for(m in samples){
            if(domdata[m,n] == allelepresent){
                gendata[[m,colinfo[n,1]]]<-c(gendata[[m,colinfo[n,1]]],colinfo[n,2])
                gendata[[m,colinfo[n,1]]]<-
                    gendata[[m,colinfo[n,1]]][gendata[[m,colinfo[n,1]]] != missing]
            }
        }
    }
    #return codominant genotypes
    return(gendata)
}

codominant.to.dominant<-function(gendata, makecolinfo=FALSE, allelepresent=1, alleleabsent=0,
                                 missingin=-9, missingout=-9,
                                 loci=dimnames(gendata)[[2]], samples=dimnames(gendata)[[1]]){
    # Find all unique alleles for each locus
    locvector<-c()
    allelevector<-c()
    for(l in loci){
        thesealleles<-c()
        for(s in samples){
            thesealleles<-c(thesealleles,gendata[[s,l]])
        }
        thesealleles<-unique(thesealleles)
        thesealleles<-thesealleles[thesealleles != missingin]
        thesealleles<-sort(thesealleles)
        locvector<-c(locvector,rep(l,length(thesealleles)))
        allelevector<-c(allelevector,thesealleles)
    }
    # Build data frame of locus and allele information.
    colinfo<-data.frame(Loci=locvector,Alleles=allelevector)
    # Set up matrix of dominant data
    domdata<-matrix(nrow=length(samples), ncol=dim(colinfo)[1],
                    dimnames=list(samples,1:dim(colinfo)[1]))
    # Search for alleles and fill matrix with presence and absence symbols
    for(m in 1:dim(colinfo)[1]){
        for(s in samples){
            if(gendata[[s,colinfo[m,1]]][1] == missingin){
                domdata[s,m]<-missingout
            } else {
            if(colinfo[[m,2]] %in% gendata[[s,colinfo[m,1]]]){
                domdata[s,m]<-allelepresent
            } else { domdata[s,m]<-alleleabsent } }
        }
        # Create the locus.allele name for the column
        dimnames(domdata)[[2]][m]<-paste(colinfo[m,1],colinfo[m,2], sep=".")
    }

    if(makecolinfo) return(list(Domdata=domdata, Colinfo=colinfo))
    else return(domdata)
}

read.GenoDive <- function(infile,missing=-9){
    rawdata<-readLines(infile)
    #information about number of samples, number of loci, etc. is in second line
    datainfo<-as.integer(strsplit(rawdata[[2]],"\t")[[1]])
    numsam<-datainfo[1] # number of individuals
    numpop<-datainfo[2] # number of populations
    numloc<-datainfo[3] # number of loci
    digits<-datainfo[5] # number of digits used to code each allele

    # get the data header with column names, including loci
    colheader<-strsplit(rawdata[[3+numpop]],"\t")[[1]]
    lastloc<-length(colheader) # index of the last locus column
    firstloc<-lastloc-numloc+1 # index of the first locus column (3 or 4)
    loci<-colheader[firstloc:lastloc] # get the locus names

    # set up the list to contain the genotype data and the vector to contain the
    # population info
    gendata<-array(list(missing),dim=c(numsam,numloc),dimnames=list(1:numsam,loci))
    popinfo<-rep(0,times=numsam)
    names(popinfo)<-1:numsam

    # fill the list and the vector!
    for(s in 1:numsam){
        # get data from the row for this sample
        sampledata<-strsplit(rawdata[[3+numpop+s]],"\t")[[1]]
        # extract name and population
        popinfo[s]<-as.integer(sampledata[1])
        names(popinfo)[s]<-sampledata[2]
        dimnames(gendata)[[1]][s]<-sampledata[2]
        # extract alleles
        for(L in 1:numloc){
            rawalleles<-sampledata[(firstloc:lastloc)[L]]
            while(nchar(rawalleles)%%digits !=0) {
                # If leading zeros were removed
                rawalleles<-paste("0",rawalleles,sep="")
            }
            # convert the character string to an integer vector
            myfirst<-seq(length=nchar(rawalleles)/digits,from=1,by=digits)
            mylast<-seq(length=nchar(rawalleles)/digits,from=digits,by=digits)
            thesealleles<-as.integer(substring(rawalleles,myfirst,mylast))
            # get rid of duplicate alleles
            thesealleles<-unique(thesealleles)
            # insert the missing data symbol as appropriate
            if(length(thesealleles)==1 && thesealleles==0){thesealleles<-missing}
            # get rid of "missing alleles" if the whole genotype is not missing
            thesealleles<-thesealleles[thesealleles !=0]
            # write the allele vector to the list
            gendata[[s,L]]<-thesealleles
        }
    }
    # return the vector and list
    return(list(PopData=popinfo,Genotypes=gendata))
}

write.GenoDive<-function(gendata, popnames="onebigpop",
                         commentline="file description goes here",
                         digits=2, file="", samples=dimnames(gendata)[[1]],
                         loci=dimnames(gendata)[[2]], popinfo=rep(1,times=length(samples)),
                         usatnts=rep(2,times=length(loci)), missing=-9){
    # fill in names if popinfo and usatnts are default
    if(is.null(names(popinfo))){names(popinfo)<-samples}
    if(is.null(names(usatnts))){names(usatnts)<-loci}
    # get some info for the second line of the file
    numsam<-length(samples)
    numloc<-length(loci)
    numpop<-length(popnames)
    if(numpop != length(unique(popinfo[samples]))){
        cat("Warning: number of populations in popnames and popinfo is different.\n")
    }
    maxploidy<-max(estimate.ploidy(gendata[samples,loci])[,"max.alleles"])

    # start a character vector to contain all the lines of the file
    lines<-c(commentline,paste(numsam,numpop,numloc,maxploidy,digits, sep="\t"))
    lines[3:(numpop+2)]<-popnames
    lines[numpop+3]<-paste("Population\tIndividual",paste(loci,sep="",collapse="\t"),sep="\t")

    # enter population and sample names
    for(s in 1:numsam){
        lines[numpop+3+s]<-paste(popinfo[samples[s]], samples[s], sep="\t")
    }

    # process alleles and write them to lines
    for(L in loci){
        # replace missing data with zeros
        repmiss<-function(value){
            if(value[1]==missing){ 0 } else {value}
        }
        convertedalleles<-mapply(repmiss, gendata[samples,L],SIMPLIFY=FALSE)
        # convert alleles to repeat number
        divallele<-function(value){floor(value/usatnts[L])}
        convertedalleles<-mapply(divallele,convertedalleles,SIMPLIFY=FALSE)
        # subtract if necessary to get them to the right number of digits
        suballele<-function(value){if(value[1]!=0){
            value-(10^(digits-1))} else{
            0}}
        while(max(mapply(max,convertedalleles)) >= 10^digits){
            convertedalleles<-mapply(suballele, convertedalleles, SIMPLIFY=FALSE)
        }
        # For each sample, concatenate into strings and add to lines
        for(s in 1:numsam){
            # Convert alleles to character and set up string for concatenation
            charalleles<-as.character(convertedalleles[[s]])
            allelestring<-""
            # For each allele:
            for(ca in charalleles){
                allele<-ca
                # Add zeros if necessary
                while(nchar(allele) < digits){
                    allele<-paste("0",allele,sep="")
                }
                # add this allele to the string
                allelestring<-paste(allelestring,allele,sep="")
            }
            # Add the allele string to the line for that sample.
            lines[numpop+3+s]<-paste(lines[numpop+3+s],allelestring,sep="\t")
        }
    }

    # write the file
    cat(lines, file=file, sep="\n")
}

write.Tetrasat<-function(gendata, commentline="insert data description here",
                         samples=dimnames(gendata)[[1]], loci=dimnames(gendata)[[2]],
                         popinfo=rep(1, length(samples)), usatnts=rep(2, length(loci)),
                         file="", missing=-9){
    # index popinfo and usatnts if not already done
    if(is.null(names(popinfo))){names(popinfo)<-samples}
    if(is.null(names(usatnts))){names(usatnts)<-loci}
    allpops<-unique(popinfo) # a vector of populations to cycle through
    # set up vector of lines to write to file
    lines<-c(commentline,loci)
    # where does population/genotype data begin?
    datastart<-length(lines) + 1

    # make lines with "Pop" and sample names, and an index of where samples are
    sampleindex<-as.integer(c())
    currentline<-datastart
    for(pop in allpops){
        lines[currentline]<-"Pop"
        currentline<-currentline+1
        # make a vector of all samples that belong to this population
        thesesamples<-samples[popinfo[samples]==pop]
        # make an index of the lines that they will go on
        indextoadd<-currentline:(currentline+length(thesesamples)-1)
        names(indextoadd)<-thesesamples
        # add this to the sample index
        sampleindex<-c(sampleindex,indextoadd)
        # write each sample name (plus spaces up to 20 characters) to the
        # appropriate line
        for(s in thesesamples){
            samname<-s
            while(nchar(samname)<20){
                samname<-paste(samname," ", sep="")
            }
            lines[currentline]<-samname
            currentline<-currentline+1
        }
    }

    # now go through the loci, convert the genotypes, and fill them in
    for(L in loci){
        # convert alleles to repeat number
        divallele<-function(value){
            if(value[1]==missing){
                value
            } else {
                floor(value/usatnts[L])
            }
        }
        convertedalleles<-mapply(divallele, gendata[samples,L],SIMPLIFY=FALSE)
        # make sure alleles have two digits or fewer
        suballele<-function(value){
            if(value[1]==missing){
                value
            } else {
                value-10
            }
        }
        while(max(mapply(max,convertedalleles)) >= 100){
            convertedalleles<-mapply(suballele, convertedalleles, SIMPLIFY=FALSE)
        }
        # go through the genotypes by sample
        for(s in samples){
            # convert missing data to blank spaces, convert to alleles to characters
            if(convertedalleles[[s]][1]==missing){
                charalleles<-"         "
            } else {
                charalleles<-as.character(convertedalleles[[s]])
                # duplicate allele if fully homozygous
                if(length(charalleles)==1){
                    charalleles<-rep(charalleles,4)
                }
                # randomly remove alleles if there are more than 4
                if(length(charalleles)>4){
                    charalleles<-sample(charalleles,4,replace=FALSE)
                    cat(c("Alleles randomly removed:",s,L,"\n"),sep=" ")
                }
            }
            # concatenate all alleles into one string
            allelestring<-""
            for(ca in charalleles){
                allele<-ca
                # Add zeros if necessary
                while(nchar(allele) < 2){
                    allele<-paste("0",allele,sep="")
                }
                # add this allele to the string
                allelestring<-paste(allelestring,allele,sep="")
            }
            # add spaces up to nine characters
            while(nchar(allelestring) < 9){
                allelestring<-paste(allelestring," ", sep="")
            }
            # add the concatenated genotype to the appropriate line
            lines[sampleindex[s]]<-paste(lines[sampleindex[s]],allelestring,sep="")
        }
    }

    cat(lines, sep="\n", file=file)
}

write.ATetra<-function(gendata, samples=dimnames(gendata)[[1]], loci=dimnames(gendata)[[2]],
                       popinfo=rep(1,length(samples)), popnames="onebigpop",
                       commentline="insert data info here", missing=-9, file=""){
    # name popinfo if necessary
    if(is.null(names(popinfo))){names(popinfo)<-samples}
    # set up a character vector to hold the lines for the file
    lines<-paste("TIT",commentline, sep=",")
    currentline<-2 # a variable to say which line to write to next
    # make numbers to go with loci, pops, and samples
    locnums<-1:length(loci)
    names(locnums)<-loci
    samnums<-1:length(samples)
    names(samnums)<-samples
    popnums<-1:length(popnames)
    names(popnums)<-popnames

    if(length(popnames) != length(unique(popinfo))){
      cat("Warning: number of populations in popnames and popinfo is different.\n")
    }

    # fill in data using a loop
    for(L in loci){
        # write a line describing the locus
        lines[currentline]<-paste("LOC",locnums[L],L,sep=",")
        currentline<-currentline+1
        for(p in popnames){
            # write a line describing the population
            lines[currentline]<-paste("POP",locnums[L],popnums[p],p, sep=",")
            currentline<-currentline+1
            # get a vector of individuals in this population
            thesesamples<-names(popinfo)[popinfo==popnums[p]]
            thesesamples<-thesesamples[thesesamples %in% samples]
            # write lines for individuals
            for(s in thesesamples){
                # first put sample info into the line
                lines[currentline]<-paste("IND",locnums[L],popnums[p],samnums[s],s,sep=",")
                # get the alleles and print a warning if there is missing data
                thesealleles<-gendata[[s,L]]
                if(thesealleles[1]==missing){
                    thesealleles<-""
                    cat("Missing data:",s,L,"\n",sep=" ")
                }
                # take a random sample if there are more than 4
                if(length(thesealleles)>4){
                    thesealleles<-sample(thesealleles,4,replace=FALSE)
                    cat("More than 4 alleles:",s,L,"\n",sep=" ")
                }
                # add alleles to the line
                for(a in 1:4){
                    if(length(thesealleles)>=a ){
                      lines[currentline]<-paste(lines[currentline],thesealleles[a],sep=",")
                  } else {
                      lines[currentline]<-paste(lines[currentline],"",sep=",")
                  }
                }
                currentline<-currentline+1
            }
        }
    }

    # write the "END-record"
    lines[currentline]<-"END"

    # write the file
    cat(lines, sep="\n", file=file)
}

read.Structure<-function(infile,missingin=-9,missingout=-9,sep="\t",markernames=TRUE,
                         labels=TRUE, extrarows=1, extracols=0, ploidy=4,
                         getexcols=FALSE
                         ){
    # read the file
    rawdata<-read.table(infile,header=markernames,sep=sep)
    # get an index of samples and a column labeling the samples
    if(labels){
        # if row labels are used, get those as the sample names
        samples<-unique(rawdata[[1]])
        samples<-samples[!is.na(samples)]
        samples<-as.character(samples)
        samples<-samples[samples != ""]
        names(rawdata)[1]<-"Samples"
    } else {
        # make an integer vector to represent samples
        samples<-1:((dim(rawdata)[1]-extrarows)/ploidy)
        samindex<-c(rep(0,times=extrarows),rep(samples, each=ploidy))
        rawdata[length(rawdata)+1]<-samindex
        names(rawdata)[length(rawdata)]<-"Samples"
    }
    # get an index of loci (will be V2 etc. if loci not named)
    loci<-names(rawdata)
    loci<-loci[loci != "Samples"]
    loci<-loci[(extracols+1):length(loci)]

    # set up the list to store the genotypes
    gendata<-array(list(missingout),dim=c(length(samples),length(loci)),
                   dimnames=list(samples,loci))

    # fill the list
    for(L in loci){
        for(s in samples){
            rawalleles<-rawdata[rawdata$Samples==s,L]
            # process missing data and get unique alleles
            thesealleles<-unique(rawalleles)
            if(length(thesealleles)==1 && thesealleles[1]==missingin){
                thesealleles<-missingout
            } else {
                thesealleles<-thesealleles[thesealleles != missingin]
            }
            gendata[[s,L]]<-thesealleles
        }
    }

    # extract the extra columns, if needed
    if(getexcols){
        Extracol<-data.frame(row.names=samples)
        if(labels){
            colsbeforex<-1
        } else {
            colsbeforex<-0
        }
        for(x in extracols){
            # look up values by sample in this column
            # add the column to the data frame
            Extracol[[x]]<-rawdata[[x+colsbeforex]][seq(1+extrarows,
                                                        length(rawdata[[1]])+1-ploidy,
                                                        by=ploidy)]
        }
    }

    # return extra columns and genotypes
    if(getexcols){
        return(list(ExtraCol=Extracol, Genotypes=gendata))
    } else {
        return(gendata)
    }
}

write.GeneMapper<-function(gendata,file="",samples=dimnames(gendata)[[1]],
                           loci=dimnames(gendata)[[2]]){
    # figure out how many allele columns are needed
    numallelecol<-max(estimate.ploidy(gendata[samples,loci])[,"max.alleles"])
    # figure out how many rows are needed
    numrows<-length(samples)*length(loci)
    # set up data frame
    gentable<-data.frame(Sample.Name=rep(samples, times=length(loci)),
                         Marker=rep(loci, each=length(samples)))
    # put empty allele columns into data frame
    alcollabels<-paste("Allele.",1:numallelecol,sep="")
    for(ac in 1:numallelecol){
        gentable[[ac+2]]<-rep("",times=numrows)
        names(gentable)[ac+2]<-alcollabels[ac]
    }

    # put alleles into their respective cells
    currentrow<-1
    for(L in loci){
        for(s in samples){
            thesealleles<-as.character(gendata[[s,L]])
            gentable[currentrow,3:(length(thesealleles)+2)]<-thesealleles
            currentrow<-currentrow+1
        }
    }

    # write the table to file
    write.table(gentable, file=file, sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
}

Lynch.distance<-function(genotype1,genotype2,missing=-9){
    if(genotype1[1]==missing || genotype2[1]==missing){
        # return NA if there is any missing data
        distance<-NA
    } else {
        # get the average number of bands for the two genotypes
        meanbands<-(length(genotype1)+length(genotype2))/2
        # find how many bands the genotypes have in common
        commonbands<-length(genotype1[genotype1 %in% genotype2])
        # calculate the distance
        distance<- 1-(commonbands/meanbands)
    }

    # return distance
    return(distance)
}

meandist.from.array<-function(distarray, samples=dimnames(distarray)[[2]],
                              loci=dimnames(distarray)[[1]]){
    # get the array to be averaged
    subarray<-distarray[loci,samples,samples]
    # make a matrix to put the means into
    mean.matrix<-matrix(nrow=length(samples),ncol=length(samples),
                        dimnames=list(samples,samples))
    for(j in samples){
        for(k in samples){
            mean.matrix[j,k]<-mean(subarray[,j,k][!is.na(subarray[,j,k])])
        }
    }
    return(mean.matrix)
}

find.na.dist<-function(distarray, samples=dimnames(distarray)[[2]],
                              loci=dimnames(distarray)[[1]]){
    # set up vectors for data frame to contain info on where missing data is
    Locus<-""
    Sample1<-""
    Sample2<-""
    # current row in the data frame
    currrow<-1
    # go through the array, find NA, and put the index into the data frame
    for(L in loci){
        for(s1 in samples){
            for(s2 in samples){
                if(is.na(distarray[L,s1,s2])){
                    Locus[currrow]<-L
                    Sample1[currrow]<-s1
                    Sample2[currrow]<-s2
                    currrow<-currrow+1
                }
            }
        }
    }
    #return data frame
    return(data.frame(Locus=Locus, Sample1=Sample1, Sample2=Sample2, stringsAsFactors=FALSE))
}

find.missing.gen<-function(gendata, missing=-9, samples=dimnames(gendata)[[1]],
                           loci=dimnames(gendata)[[2]]){
    # set up vectors to contain the indices to put into the data frame
    Locus <- c("")
    Sample <- c("")
    # current row in the data frame
    currrow<-1
    # find which data are missing
    for(L in loci){
        for(s in samples){
            if(gendata[[s,L]][1]==missing){
                Locus[currrow]<-L
                Sample[currrow]<-s
                currrow<-currrow+1
            }
        }
    }
    # return the data frame
    return(data.frame(Locus=Locus,Sample=Sample, stringsAsFactors=FALSE))
}

# find NA distances that aren't the result of missing data
find.na.dist.not.missing<-function(gendata, distarray, missing=-9,
                                   samples=dimnames(distarray)[[2]],
                                   loci=dimnames(distarray)[[1]]){
    # get the data frames of NA distances and missing genotypes
    na.dist<-find.na.dist(distarray,samples=samples,loci=loci)
    missing.gen<-find.missing.gen(gendata,missing=missing,samples=samples,loci=loci)
    # set up vectors for data frame to contain results
    Locus<-""
    Sample1<-""
    Sample2<-""
    currrow<-1
    # for each row of na.dist, look for that locus and samples in missing.gen
    for(i in 1:length(na.dist$Locus)){
        L<-na.dist$Locus[i]
        s1<-na.dist$Sample1[i]
        s2<-na.dist$Sample2[i]
        missthislocus<-missing.gen[missing.gen$Locus==L,]
        if(identical(c(s1,s2) %in% missthislocus$Sample, c(FALSE,FALSE))){
            Locus[currrow]<-L
            Sample1[currrow]<-s1
            Sample2[currrow]<-s2
            currrow<-currrow+1
        }
    }
    # return data frame
    return(data.frame(Locus=Locus, Sample1=Sample1, Sample2=Sample2, stringsAsFactors=FALSE))
}

estimate.freq<-function(gendata, missing=-9, samples=dimnames(gendata)[[1]],
                        loci=dimnames(gendata)[[2]], popinfo=rep(1,length(samples)),
                        indploidies=rep(4,length(samples))){
    # name popinfo and indploidies if necessary
    if(is.null(names(popinfo))){
        names(popinfo)<-samples
    }
    if(is.null(names(indploidies))){
        names(indploidies)<-samples
    }
    # get presence/absence data for alleles, and the table of which allele and locus
    # is in which column
    gentable<-codominant.to.dominant(gendata[samples,loci], makecolinfo=TRUE,
                                     missingin=missing)
    colinfo<-gentable[[2]]
    gentable<-gentable[[1]]
    # get a list of populations
    pops<-sort(unique(popinfo[samples]))
    # get total number of genomes per population
    totgenomes<-rep(0,length(pops))
    names(totgenomes)<-pops
    for(p in pops){
        totgenomes[p]<-sum(indploidies[samples[popinfo[samples]==p]])
    }

    # set up data frame to contain allele frequencies
    freqtable<-data.frame(Genomes=totgenomes,row.names=pops)

    # loop to get frequency data
    for(L in loci){
        # get all samples without missing data at this locus
        xsamples<-samples[gentable[,match(L,colinfo[[1]])]!=-9]
        # get the total number of genomes per population
        totgenomes<-rep(0,length(pops))
        names(totgenomes)<-pops
        for(p in pops){
            totgenomes[p]<-sum(indploidies[xsamples[popinfo[xsamples]==p]])
        }
        # make a conversion factor to weight allele presence based on ploidy
        # of each individual and number of alleles at this locus
        numalleles<-sapply(gendata[xsamples,L],length)
        names(numalleles)<-xsamples
        convf<-indploidies[xsamples]/numalleles
        # convert alleles in the table to estimated copy number
        loctable<-gentable[xsamples,colinfo[[1]]==L]*convf
        # loop through alleles at this locus
        for(al in colinfo[[2]][colinfo[[1]]==L]){
            theseallelefreqs<-rep(0,length(pops))
            names(theseallelefreqs)<-pops
            # loop through populations
            for(p in pops){
                theseallelefreqs[p]<-sum(loctable[xsamples[popinfo[xsamples]==p],
                                                  paste(L,al,sep=".")])/totgenomes[p]
            }
            freqtable<-cbind(freqtable,theseallelefreqs)
        }
    }

    # return data frame
    names(freqtable)<-c("Genomes",dimnames(gentable)[[2]])
    return(freqtable)
}

calcFst<-function(freqs, pops=row.names(freqs),
                  loci=unique(as.matrix(as.data.frame(strsplit(names(freqs), split=".",
                                                      fixed=TRUE),
                                             stringsAsFactors=FALSE))[1,])){
    # Clean up loci
    loci<-loci[loci!="Genomes"]
    # Set up matrix for Fst values
    fsts<-matrix(0,nrow=length(pops),ncol=length(pops),dimnames=list(pops,pops))
    # Get genome number from the table
    genomes<-freqs$Genomes
    names(genomes)<-pops
    for(m in 1:length(pops)){
        for(n in m:length(pops)){
            # set up array for HT and HS values
            hets<-array(0,dim=c(length(loci),2),dimnames=list(loci,c("HT","HS")))
            for(L in loci){
                # get just the frequencies for these pops and this locus
                thesefreqs<-freqs[c(pops[m],pops[n]),grep(L,names(freqs),fixed=TRUE)]
                # get average allele frequencies weighted by genomes/pop
                avgfreq<-(thesefreqs[1,]*genomes[pops[m]] + thesefreqs[2,]*genomes[pops[n]])/
                    (genomes[pops[m]] + genomes[pops[n]])
                # estimate H by 1 - sum of squared allele frequencies
                # put the heterozygositites in the array
                hets[L,"HT"]<-1-sum(avgfreq^2)
                hets[L,"HS"]<-((1-sum(thesefreqs[1,]^2))*genomes[pops[m]] +
                               (1-sum(thesefreqs[2,]^2))*genomes[pops[n]])/
                                   (genomes[pops[m]] + genomes[pops[n]])
            }
            HT<-mean(hets[,"HT"])
            HS<-mean(hets[,"HS"])
            fsts[m,n]<-(HT-HS)/HT
            fsts[n,m]<-(HT-HS)/HT
        }
    }
    # return matrix of Fst values
    return(fsts)
}

read.SPAGeDi<-function(infile, allelesep="/", returncatspatcoord=FALSE,
                       returnploidies=FALSE, missing=-9){
    # get all lines from the file, and eliminate comment lines
    Lines<-readLines(infile)
    Lines<-Lines[Lines != ""]
    first2<-sapply(Lines, substring, first=1, last=2)
    Lines<-Lines[first2 != "//"]
    # get data from the first line
    fileinfo<-as.integer(strsplit(Lines[1], "\t")[[1]])
    numind<-fileinfo[1] # number of samples
    numcat<-fileinfo[2] # number of categories
    numsc<-fileinfo[3] # number of spatial coordinates
    numloc<-fileinfo[4] # number of loci
    digits<-fileinfo[5] # number of digits to represent alleles

    # Is there a column of categories?
    if(numcat==0){
        catpres<-0
    } else {
        catpres<-1
    }
    # Is latitude and longitude used instead of Cartesian coordinates?
    if(numsc==-2){
        numsc<-2
    }

    # read the rest of the file as a table
    cat(Lines[3:(3+numind)], sep="\n", file="SpagTemp.txt")
    gentable <- read.table("SpagTemp.txt", sep="\t", header=TRUE,
                           row.names=1,
                           colClasses=c("character",rep(NA,catpres+numsc),
                           rep("character",numloc)))
    # get sample and locus names
    samples<-row.names(gentable)
    loci<-names(gentable)[(length(gentable)-numloc+1):length(gentable)]

    # set up list to contain genotypes
    gendata <- array(list(missing), dim=c(length(samples), length(loci)),
                     dimnames=list(samples,loci))
    # set up vector to contain ploidies
    indploidies <- rep(4, length(samples))
    names(indploidies) <- samples

    # If there is no separation of alleles, count digits off with substring
    if(allelesep==""){
        for(s in samples){
            #set up a list to contain genotypes
            thesegenotypes<-list(0)
            length(thesegenotypes)<-length(loci)
            names(thesegenotypes)<-loci

            for(L in loci){
                # add leading zeros if necessary
                while(nchar(gentable[s,L])%%digits !=0){
                    gentable[s,L]<-paste("0",gentable[s,L],sep="")
                }
                # split into alleles and convert to integers
                thesegenotypes[[L]]<-as.integer(substring(gentable[s,L],
                                               first=seq(1,
                                               nchar(gentable[s,L])-digits+1,
                                               by=digits),
                                               last=seq(digits,
                                               nchar(gentable[s,L]),by=digits)))
                # if genotype only has zeros, write missing data symbol
                if(length(unique(thesegenotypes[[L]]))==1 && thesegenotypes[[L]][1]==0){
                    thesegenotypes[[L]]<-missing
                }
                # otherwise remove zeros on left
                while(thesegenotypes[[L]][1]==0){
                    thesegenotypes[[L]]<-thesegenotypes[[L]][-1]
                }
            }
            # get ploidy of sample
            indploidies[s] <- max(sapply(thesegenotypes,length))

            for(L in loci){
                # remove zeros on the right
                thesegenotypes[[L]]<-thesegenotypes[[L]][thesegenotypes[[L]] != 0]
                # get unique alleles
                thesegenotypes[[L]]<-unique(thesegenotypes[[L]])
            }
            # add genotypes to list
            gendata[s,]<-thesegenotypes
        }

    # get alleles by strsplit
    } else {
        for(s in samples){
            # get alleles by splitting the strings
            thesegenotypes<-sapply(gentable[s,loci],strsplit,
                                   split=allelesep,fixed=TRUE)
            names(thesegenotypes) <- loci

            for(L in loci){
                # convert to integer
                thesegenotypes[[L]]<-as.integer(thesegenotypes[[L]])
                # if genotype only has zeros, write missing data symbol
                if(length(unique(thesegenotypes[[L]]))==1 && thesegenotypes[[L]][1]==0){
                    thesegenotypes[[L]]<-missing
                }
                # otherwise remove zeros on left
                while(thesegenotypes[[L]][1]==0){
                    thesegenotypes[[L]]<-thesegenotypes[[L]][-1]
                }
            }
            # get ploidy of sample
            indploidies[s] <- max(sapply(thesegenotypes,length))

            for(L in loci){
                # remove zeros on the right
                thesegenotypes[[L]]<-thesegenotypes[[L]][thesegenotypes[[L]] != 0]
                # get unique alleles
                thesegenotypes[[L]]<-unique(thesegenotypes[[L]])
            }
            # add genotypes to list
            gendata[s,]<-thesegenotypes
        }
    }

    # return the genotypes, and other data as applicable
    if(identical(c(returncatspatcoord,returnploidies), c(FALSE,FALSE))){
        return(gendata)
    } else {
        return(list(CatSpatCoord=gentable[,!names(gentable) %in% loci],
                    Indploidies=indploidies,
                    Genotypes=gendata)[c(returncatspatcoord,
                    returnploidies,TRUE)])
    }
}

write.SPAGeDi<-function(gendata,samples=dimnames(gendata)[[1]],
                        loci=dimnames(gendata)[[2]],
                        indploidies=rep(4,length(samples)),
                        popinfo=rep(1,length(samples)),
                        allelesep="/", digits=2, file="",
                        spatcoord=data.frame(X=rep(1,length(samples)),
                                             Y=rep(1,length(samples)),
                                             row.names=samples),
                        usatnts=rep(2, length(loci)),
                        missing=-9){
    # name indploidies, popinfo, usatnts, and if not already done
    if(is.null(names(indploidies))){names(indploidies)<-samples}
    if(is.null(names(popinfo))){names(popinfo)<-samples}
    if(is.null(names(usatnts))){names(usatnts)<-loci}
    if(identical(row.names(spatcoord), as.character(1:dim(spatcoord)[1]))){
        row.names(spatcoord)<-samples}

    # set up data frame to contain genotypes
    gentable<-data.frame(Ind=samples, Cat=popinfo[samples], spatcoord[samples,])
    # find missing data
    misstable<-find.missing.gen(gendata[samples,loci], missing=missing)
    # get genotype data by locus (column)
    for(L in loci){
        genotypesL<-gendata[samples,L]
        missingsamples<-misstable$Sample[misstable$Locus==L]
        # replace missing data with zeros
        genotypesL[missingsamples]<-0
        # divide and subtract to convert to repeats
        divallele<-function(value){floor(value/usatnts[L])}
        genotypesL<-mapply(divallele,genotypesL,SIMPLIFY=FALSE)
        suballele<-function(value){if(value[1]!=0){
            value-(10^(digits-1))} else{
            0}}
        while(max(mapply(max,genotypesL)) >= 10^digits){
            genotypesL<-mapply(suballele, genotypesL, SIMPLIFY=FALSE)
        }
        # add zeros up to correct ploidy, delete alleles if necessary
        zerostoadd<-indploidies[samples] - mapply(length,genotypesL)
        names(zerostoadd)<-samples
        for(s in samples){
            if(length(genotypesL[[s]])==1){
                # replicate allele if totally homozygous
                genotypesL[[s]]<-rep(genotypesL[[s]],indploidies[s])
            } else {
                if(zerostoadd[s] < 0){
                    # randomly remove alleles if there are too many
                    genotypesL[[s]]<-sample(genotypesL[[s]],indploidies[s],
                                            replace=FALSE)
                    cat("Alleles randomly removed to get to ploidy:",L,s,"\n")
                } else {
                    # add zeros for partial heterozygotes
                    genotypesL[[s]]<-c(genotypesL[[s]],rep(0,zerostoadd[s]))
                }
            }
            # also make each allele the right number of digits if necessary
            if(allelesep==""){
                genotypesL[[s]]<-as.character(genotypesL[[s]])
                for(a in 1:length(genotypesL[[s]])){
                    while(nchar(genotypesL[[s]][a]) < digits){
                        genotypesL[[s]][a]<-paste(0,genotypesL[[s]][a],sep="")
                    }
                }
            }
        }
        # concatenate into strings
        genvect<-mapply(paste,genotypesL,collapse=allelesep)
        # add the vector to the data frame
        gentable<-data.frame(gentable,genvect)
        names(gentable)[dim(gentable)[2]]<-L
    }
    # write file
    write.table(gentable,file="SpagTemp.txt",sep="\t",row.names=FALSE,
                col.names=TRUE,quote=FALSE)
    cat(paste(length(samples),length(unique(popinfo[samples])),dim(spatcoord)[2],
                length(loci),digits,max(indploidies[samples]),sep="\t"),"0",
        readLines("SpagTemp.txt"),"END",sep="\n",file=file)
}
